Checking Species Abundance and Biomass for Previous Works

While investigating some discrepancies among different datasets a handful of corrections were made. This document is intended to see if any of those changes have important implications for some of the species-specific reports that have been done recently.

The two datasets being compared here are the survdat_Nye_allseasons.RData and the more recent NEFSC_BTS_all_seasons_03032021.RData.

# Cleanup functions
source(here("R/01_nefsc_ss_build.R"))

####  Survdat resource Load Data


# Data used in 2020 paper
load(paste0(nmfs_path, "Survdat_Nye_allseason.RData"))
survdat_nye  <- survdat %>% clean_names()

# Run cleanup
survdat_nye <- survdat_prep(survdat = survdat_nye) %>% 
  mutate(survdat_source = "survdat_nye")


# Data we just received in 2021 with errors located and corrected
load(paste0(nmfs_path, "NEFSC_BTS_all_seasons_03032021.RData"))
survdat_21 <- survey$survdat %>% clean_names()


# Run cleanup
survdat_21 <- survdat_prep(survdat = survdat_21) %>% 
  mutate(survdat_source = "survdat_2021")

rm(survdat, survey)

####  Load the species list
species_check <- read_csv(here("data/andrew_species/Assesmentfishspecies.csv"))
species_check <- species_check %>% 
  clean_names() %>% 
  mutate(svspp = str_pad(svspp, 3, pad = "0", side = "left"),
         comname = tolower(comname),
         species = str_to_title(species)) %>% 
  arrange(svspp)

Making Comparisons

# Make years comparable
survdat_21 <- survdat_21 %>% 
  filter(est_year %in% unique(survdat_nye$est_year))


# Filter species down for both
survdat_nye <- filter(survdat_nye, svspp %in% species_check$svspp)
survdat_21  <- filter(survdat_21,  svspp %in% species_check$svspp)


# Do some gross abundance or biomass comparisons by species
species_nye <- survdat_nye %>% split(.$comname)
species_21  <- survdat_21 %>% split(.$comname)



# Make  Comparisons

# Vector of species and their common names, atleast the common names Andrew used
andrew_species <- species_check$comname %>% setNames(species_check$svspp)

# Pull the species
species_comparisons <- imap(andrew_species, function(species_comname, species_svspp){
  
  # there are some name mismatches, so catch those
  in_nye <- species_comname %in% names(species_nye)
  in_21  <- species_comname %in% names(species_21)
  in_both <- in_nye & in_21
  if(in_both == FALSE){
    return(list("in_nye" = in_nye, 
                "in_21" = in_21,
                "data" = data.frame(),
                "metrics" = data.frame()))
  }
  
  # Pull just that species from the old data
  old_data_summ <- survdat_nye %>% 
    filter(svspp == species_svspp) %>% 
    group_by(comname, est_year) %>% 
    summarise(old_abundance = sum(abundance, na.rm = T),
              old_biomass   = sum(biom_adj, na.rm = T),
              .groups = "keep")
  
   # Pull just that species from the new data
  new_data_summ <- survdat_21 %>% 
    filter(svspp == species_svspp) %>% 
    group_by(comname, est_year) %>% 
    summarise(new_abundance = sum(abundance, na.rm = T),
              new_biomass   = sum(biom_adj, na.rm = T),
              .groups = "keep")
  
  # join them for side-by-side data
  combined_data <- left_join(old_data_summ, new_data_summ, by = c("comname", "est_year")) %>% 
    mutate(relative_abund_change = ((new_abundance - old_abundance) / old_abundance) * 100,
           relative_biom_change  = ((new_biomass - old_biomass) / old_biomass) * 100) 
  
  # correlation
  abund_cor   <- cor(combined_data$new_abundance, combined_data$old_abundance, use = "pairwise.complete.obs")
  biomass_cor <- cor(combined_data$new_biomass, combined_data$old_biomass,   use = "pairwise.complete.obs")
  
  # average relative shift
  abund_shift <- mean(combined_data$relative_abund_change, na.rm = T)
  biom_shift  <- mean(combined_data$relative_biom_change, na.rm = T)
  
  # put in list to export
  list("data" = combined_data,
       "metrics" = data.frame("comname"                  = species_comname,
                              "svspp"                    = species_svspp,
                              "abundance_correlation"    = abund_cor,
                              "biomass_correlation"      = biomass_cor,
                              "abundance_percent_change" = abund_shift,
                              "biomass_percent_change"   = biom_shift))
  })


# put data and metrics into a table
comparison_data    <- map_dfr(species_comparisons, ~.x[["data"]]) 
comparison_metrics <- map_dfr(species_comparisons, ~.x[["metrics"]]) 

Abundance

Correlation

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, abundance_correlation, max, .desc = F)) %>% 
  ggplot(aes(x = abundance_correlation, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Correlation Between Total Annual Abundances")

Annual Percent Change

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, abundance_percent_change, max, .desc = F)) %>% 
  ggplot(aes(x = abundance_percent_change, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    scale_x_continuous(labels = scales::percent_format()) +
    labs(y = "", x = "Avg. Percent Change in Annual Abundance\nMoving from Old Data -> New Data")

Problem Species

Poor Correlation

# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>% 
  filter(abundance_correlation <= corr_cutoff) %>% 
   mutate(comname = fct_drop(comname)) %>% 
  pull(comname)

# filter species
corr_sub <- comparison_data %>% 
  filter(comname %in% corr_species) 

# how many species per plot
p1 <- corr_species[1:6]
p2 <- corr_species[-c(1:6)]

# p1
corr_sub %>% 
  filter(comname %in% p1) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, old_abundance, color = "Survdat Nye")) +
  geom_line(aes(est_year, new_abundance, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free" ) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Abundance", 
       title = paste0("Species with Annual Abundance Correlation <= ", corr_cutoff),
       color = "Survdat Source",
       subtitle = "Subset 1")

# p1
corr_sub %>% 
  filter(comname %in% p2) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, old_abundance, color = "Survdat Nye")) +
  geom_line(aes(est_year, new_abundance, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free" ) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Abundance", 
       title = paste0("Species with Annual Abundance Correlation <= ", corr_cutoff),
       color = "Survdat Source",
       subtitle = "Subset 2")

Large Percent Change

# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>% 
  filter(abundance_percent_change >= perc_cutoff) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  pull(comname) 


comparison_data %>% 
  filter(comname %in% perc_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, old_abundance, color = "Survdat Nye")) +
  geom_line(aes(est_year, new_abundance, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free") +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Abundance",
       title = paste0("Species with Avg. Shift in Abundance >= ", perc_cutoff, "%"),
       color = "Survdat Source")

Biomass

Correlation

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, biomass_correlation, max, .desc = F)) %>% 
  ggplot(aes(x = biomass_correlation, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Correlation Between Total Annual Biomasses")

Annual Percent Change

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, biomass_percent_change, max, .desc = F)) %>% 
  ggplot(aes(x = biomass_percent_change, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Avg. Percent Change in Annual Biomasses\nMoving from Old Data -> New Data")

Problem Species

Poor Correlation

# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>% 
  filter(biomass_correlation <= corr_cutoff) %>% 
   mutate(comname = fct_drop(comname)) %>% 
  pull(comname)

# plot
comparison_data %>% 
  filter(comname %in% corr_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, old_biomass, color = "Survdat Nye")) +
  geom_line(aes(est_year, new_biomass, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free" ) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Biomass", 
       title = paste0("Species with Annual Biomass Correlation <= ", corr_cutoff),
       color = "Survdat Source")

Large Percent Change

# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>% 
  filter(biomass_percent_change >= perc_cutoff) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  pull(comname) 


comparison_data %>% 
  filter(comname %in% perc_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, old_biomass, color = "Survdat Nye")) +
  geom_line(aes(est_year, new_biomass, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free") +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Biomass",
       title = paste0("Species with Avg. Shift in Biomass >= ", perc_cutoff, "%"),
       color = "Survdat Source")

 

A work by Adam A. Kemberling

Akemberling@gmri.org